# Importing Required Libraries
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from glob import glob
import tensorflow
import cv2
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras import datasets, models, layers, optimizers
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from google.colab.patches import cv2_imshow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Flatten,Conv2D, Dropout, MaxPool2D,MaxPooling2D,GlobalMaxPooling2D
from keras.optimizers import RMSprop,Adam
from sklearn.metrics import classification_report, confusion_matrix
#loading labels dataset
seed_labels = pd.read_csv("/content/sample_data/Labels.csv")
seed_labels.info()
seed_labels.shape
# Converting the Label with an 'object' datatype into categorical variables
seed_labels["Label"] = pd.Categorical(seed_labels["Label"])
seed_labels.info()
# Checking unique values in the labels.
seed_labels['Label'].unique()
sns.set(rc={'figure.figsize':(25,7)})
ax=sns.countplot(x='Label', data=seed_labels)
#Loading Images Data
seed_images = np.load("/content/sample_data/images.npy")
# Checking Sahpe of the data
seed_images.shape
# Printing images randomly to check the images
for img in range(1, 4769, 1000):
plt.grid(None)
plt.imshow(seed_images[img],cmap='gray')
plt.title(seed_labels.Label[img])
plt.show()
color = ('b','g','r')
plt.figure()
for i,col in enumerate(color):
histr = cv2.calcHist(seed_images[img],[i],None,[256],[0,256])
plt.plot(histr,color = col)
plt.xlim([0,256])
plt.show()
# Converting into float
seed_images = seed_images.astype('float32')
# Normalizing by dividing with 255
seed_images /= 255
print("seed_images shape:", seed_images.shape)
print("Images in seed_images:", seed_images.shape[0])
print("Max value in seed_images:", seed_images.max())
print("Min value in seed_images:", seed_images.min())
# Function to create the blur image through Gaussian by applying weights.
def blur_image(img):
image_blurred = cv2.GaussianBlur(img, (5, 5), 3)
weight_image = cv2.addWeighted(img, 1.5, image_blurred, -0.5, 0)
# return weight_image
return image_blurred
blur_images = np.empty((4750, 128, 128,3))
for indx, image in enumerate(seed_images):
try:
blur_images[indx] = blur_image(image)
except Exception as ex:
print(indx)
print(ex)
continue
for img in range(1, 4769, 1000):
plt.grid(None)
plt.imshow(blur_images[img],cmap='gray')
plt.title(seed_labels.Label[img])
plt.show()
# Comparing the images before and after guassian blurring
# left image is original image and right image is with gaussian blurring.
for img in range(1, 4769, 1000):
fig, axs = plt.subplots(1, 2, figsize=(10, 10))
axs[0].imshow(seed_images[img])
axs[1].imshow(blur_images[img])
enc = LabelBinarizer()
labels = enc.fit_transform(seed_labels)
labels[0]
# Spliting the data into train and test with test size 03
X_train, X_test, y_train, y_test = train_test_split(blur_images, labels, test_size=0.3, random_state=1)
print("Shape of X_train.shape:", X_train.shape)
print("Shape of X_test.shape:", X_test.shape)
print("Shape of y_train.shape:", y_train.shape)
print("Shape of y_test.shape:", y_test.shape)
print("y_train[0]:",y_train[0])
# Splitting the test data again test and validation with test size .5
X_test, X_val, y_test, y_val = train_test_split(X_test,y_test, test_size = 0.5, random_state=1)
print("Shape of X_test.shape:", X_test.shape)
print("Shape of X_val.shape:", X_val.shape)
print("Shape of y_test.shape:", y_test.shape)
print("Shape of y_val.shape:", y_val.shape)
print("Shape of X_train.shape:", X_train.shape)
print("Shape of X_test.shape:", X_test.shape)
print("Shape of X_val.shape:", X_val.shape)
# Set the CNN model
batch_size = None
from tensorflow.keras.models import Sequential
model = Sequential()
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same',
activation ='relu', batch_input_shape = (batch_size,128, 128,3)))
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same',
activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same',
activation ='relu'))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'same',
activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Conv2D(filters = 128, kernel_size = (3,3),padding = 'Same',
activation ='relu'))
model.add(Conv2D(filters = 128, kernel_size = (3,3),padding = 'Same',
activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(GlobalMaxPooling2D())
model.add(Dense(256, activation = "relu"))
model.add(Dense(12, activation = "softmax"))
model.summary()
# Optimizer as Adam and loss as categorical_crossentropy
model.compile(optimizer = Adam(lr=0.001), loss = "categorical_crossentropy", metrics = ["accuracy"])
# Adding data generator to redice overfitting.
datagen = ImageDataGenerator(rotation_range=20, zoom_range = 0.1, width_shift_range=0.2,
height_shift_range=0.2,horizontal_flip=True, vertical_flip=True)
datagen.fit(X_train)
batch_size = 32
epochs = 22
# Fit the model with imagedatagenerator
callback = tensorflow.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=3, min_delta=0.01)
history = model.fit_generator(datagen.flow(X_train,y_train, batch_size=batch_size),
epochs=epochs, validation_data=(X_val, y_val),
verbose =1,callbacks=[callback])
# Model Evaluation
score = model.evaluate(X_test, y_test, verbose=0, batch_size = 38)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
# plotting learning history per epoch for accuration and loss
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
# Plotting accuracy at different epochs
plt.plot(hist['loss'])
plt.plot(hist['val_loss'])
plt.legend(("train loss" , "validation loss") , loc =0)
plt.show()
# Plotting accuracy at different epochs
plt.plot(hist['accuracy'])
plt.plot(hist['val_accuracy'])
plt.legend(("train accuracy" , "validation accuracy") , loc =0)
plt.show()
# confustion metrics
y_pred = model.predict(X_test)
y_class = np.argmax(y_pred, axis = 1)
y_check = np.argmax(y_test, axis = 1)
cmatrix = confusion_matrix(y_check, y_class)
print(cmatrix)
#Generating heatmap to understnad better about the lables
df_cm = pd.DataFrame(cmatrix, index = [i for i in range(0,12)],
columns = [i for i in range(0,12)])
plt.figure(figsize = (12,7))
sns.heatmap(df_cm, annot=True, fmt='d')
Model perfoormed well on all the seedlings except between Black Grass and Loose Silky-bent 14 of Black Grass are predicted them as Loose Silky-bent.
Visualize predictions for x_test[2], x_test[3], x_test[33], x_test[36], x_test[59]
Predicted_classes = model.predict_classes(X_test)
enc.classes_
Predicted_classes
index = [2,3,33,36,59]
for i in [2,3,33,36,59]:
act = np.argmax(y_test[i])
print("i value:",i)
print("Predicted class: {}".format(enc.classes_[Predicted_classes[i]]))
print("Actual class: {}".format(enc.classes_[ np.argmax(y_test[i])]))
plt.imshow(X_test[i])
plt.show()